@@ -49,6 +49,8 @@ module Agents |
||
49 | 49 |
|
50 | 50 |
Set `user_agent` to a custom User-Agent name if the website does not like the default value ("Faraday v#{Faraday::VERSION}"). |
51 | 51 |
|
52 |
+ The `headers` field is optional. When present, it should be a hash of headers to send with the request. |
|
53 |
+ |
|
52 | 54 |
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. |
53 | 55 |
MD |
54 | 56 |
|
@@ -111,6 +113,10 @@ module Agents |
||
111 | 113 |
errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String) |
112 | 114 |
end |
113 | 115 |
|
116 |
+ unless headers.is_a?(Hash) |
|
117 |
+ errors.add(:base, "if provided, headers must be a hash") |
|
118 |
+ end |
|
119 |
+ |
|
114 | 120 |
begin |
115 | 121 |
basic_auth_credentials() |
116 | 122 |
rescue => e |
@@ -287,6 +293,8 @@ module Agents |
||
287 | 293 |
|
288 | 294 |
def faraday |
289 | 295 |
@faraday ||= Faraday.new { |builder| |
296 |
+ builder.headers = headers if headers.length > 0 |
|
297 |
+ |
|
290 | 298 |
if (user_agent = options['user_agent']).present? |
291 | 299 |
builder.headers[:user_agent] = user_agent |
292 | 300 |
end |
@@ -320,5 +328,9 @@ module Agents |
||
320 | 328 |
end |
321 | 329 |
raise "bad value for basic_auth: #{value.inspect}" |
322 | 330 |
end |
331 |
+ |
|
332 |
+ def headers |
|
333 |
+ options['headers'].presence || {} |
|
334 |
+ end |
|
323 | 335 |
end |
324 | 336 |
end |
@@ -21,28 +21,71 @@ describe Agents::WebsiteAgent do |
||
21 | 21 |
@checker.save! |
22 | 22 |
end |
23 | 23 |
|
24 |
- describe "#check" do |
|
24 |
+ describe "validations" do |
|
25 |
+ before do |
|
26 |
+ @checker.should be_valid |
|
27 |
+ end |
|
28 |
+ |
|
25 | 29 |
it "should validate the integer fields" do |
26 |
- @checker.options['expected_update_period_in_days'] = "nonsense" |
|
27 |
- lambda { @checker.save! }.should raise_error; |
|
28 | 30 |
@checker.options['expected_update_period_in_days'] = "2" |
31 |
+ @checker.should be_valid |
|
32 |
+ |
|
33 |
+ @checker.options['expected_update_period_in_days'] = "nonsense" |
|
34 |
+ @checker.should_not be_valid |
|
35 |
+ end |
|
36 |
+ |
|
37 |
+ it "should validate uniqueness_look_back" do |
|
29 | 38 |
@checker.options['uniqueness_look_back'] = "nonsense" |
30 |
- lambda { @checker.save! }.should raise_error; |
|
39 |
+ @checker.should_not be_valid |
|
40 |
+ |
|
41 |
+ @checker.options['uniqueness_look_back'] = "2" |
|
42 |
+ @checker.should be_valid |
|
43 |
+ end |
|
44 |
+ |
|
45 |
+ it "should validate headers" do |
|
46 |
+ @checker.options['headers'] = "blah" |
|
47 |
+ @checker.should_not be_valid |
|
48 |
+ |
|
49 |
+ @checker.options['headers'] = "" |
|
50 |
+ @checker.should be_valid |
|
51 |
+ |
|
52 |
+ @checker.options['headers'] = {} |
|
53 |
+ @checker.should be_valid |
|
54 |
+ |
|
55 |
+ @checker.options['headers'] = { 'foo' => 'bar' } |
|
56 |
+ @checker.should be_valid |
|
57 |
+ end |
|
58 |
+ |
|
59 |
+ it "should validate mode" do |
|
31 | 60 |
@checker.options['mode'] = "nonsense" |
32 |
- lambda { @checker.save! }.should raise_error; |
|
33 |
- @checker.options = @site |
|
61 |
+ @checker.should_not be_valid |
|
62 |
+ |
|
63 |
+ @checker.options['mode'] = "on_change" |
|
64 |
+ @checker.should be_valid |
|
65 |
+ |
|
66 |
+ @checker.options['mode'] = "all" |
|
67 |
+ @checker.should be_valid |
|
68 |
+ |
|
69 |
+ @checker.options['mode'] = "" |
|
70 |
+ @checker.should be_valid |
|
34 | 71 |
end |
35 | 72 |
|
36 | 73 |
it "should validate the force_encoding option" do |
74 |
+ @checker.options['force_encoding'] = '' |
|
75 |
+ @checker.should be_valid |
|
76 |
+ |
|
37 | 77 |
@checker.options['force_encoding'] = 'UTF-8' |
38 |
- lambda { @checker.save! }.should_not raise_error; |
|
78 |
+ @checker.should be_valid |
|
79 |
+ |
|
39 | 80 |
@checker.options['force_encoding'] = ['UTF-8'] |
40 |
- lambda { @checker.save! }.should raise_error; |
|
81 |
+ @checker.should_not be_valid |
|
82 |
+ |
|
41 | 83 |
@checker.options['force_encoding'] = 'UTF-42' |
42 |
- lambda { @checker.save! }.should raise_error; |
|
43 |
- @checker.options = @site |
|
84 |
+ @checker.should_not be_valid |
|
44 | 85 |
end |
86 |
+ end |
|
45 | 87 |
|
88 |
+ describe "#check" do |
|
46 | 89 |
it "should check for changes (and update Event.expires_at)" do |
47 | 90 |
lambda { @checker.check }.should change { Event.count }.by(1) |
48 | 91 |
event = Event.last |
@@ -377,10 +420,10 @@ describe Agents::WebsiteAgent do |
||
377 | 420 |
end |
378 | 421 |
end |
379 | 422 |
|
380 |
- describe "checking with User-Agent" do |
|
423 |
+ describe "checking with headers" do |
|
381 | 424 |
before do |
382 | 425 |
stub_request(:any, /example/). |
383 |
- with(headers: { 'User-Agent' => 'Sushi' }). |
|
426 |
+ with(headers: { 'foo' => 'bar', 'user_agent' => /Faraday/ }). |
|
384 | 427 |
to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
385 | 428 |
@site = { |
386 | 429 |
'name' => "XKCD", |
@@ -388,12 +431,10 @@ describe Agents::WebsiteAgent do |
||
388 | 431 |
'type' => "html", |
389 | 432 |
'url' => "http://www.example.com", |
390 | 433 |
'mode' => 'on_change', |
434 |
+ 'headers' => { 'foo' => 'bar' }, |
|
391 | 435 |
'extract' => { |
392 | 436 |
'url' => { 'css' => "#comic img", 'attr' => "src" }, |
393 |
- 'title' => { 'css' => "#comic img", 'attr' => "alt" }, |
|
394 |
- 'hovertext' => { 'css' => "#comic img", 'attr' => "title" } |
|
395 |
- }, |
|
396 |
- 'user_agent' => "Sushi" |
|
437 |
+ } |
|
397 | 438 |
} |
398 | 439 |
@checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site) |
399 | 440 |
@checker.user = users(:bob) |
@@ -403,7 +444,6 @@ describe Agents::WebsiteAgent do |
||
403 | 444 |
describe "#check" do |
404 | 445 |
it "should check for changes" do |
405 | 446 |
lambda { @checker.check }.should change { Event.count }.by(1) |
406 |
- lambda { @checker.check }.should_not change { Event.count } |
|
407 | 447 |
end |
408 | 448 |
end |
409 | 449 |
end |